* Firm organization with multiple establishments
* Section III.A: Appendix Table B.1 data preparations

clear all 
set matsize 2000
set more off, perm

capture log close
log using log/05_facts-p_MElocation_data.log, replace

********************************************************************************
***	CALCULATE MARKET POTENTIAL

use data/Distanzen_v2014.dta, clear

gen jahr = 1998
forvalues i = 1/12 {
	append using $data/Distanzen_v2014.dta
	replace jahr = 1998 + `i' if jahr == .
}

sort ao_kreis jahr
merge m:1 ao_kreis jahr using $data/Kreis_controls.dta, keep(1 3) keepusing(BIP)
drop _merge

gen w_BIP = (1/distance_all)*BIP
replace w_BIP = BIP if hq_kreis == ao_kreis
bys hq_kreis jahr: egen double MP = total(w_BIP)
label variable MP "Market potential, average distance weighted"

drop ao_kreis
rename hq_kreis ao_kreis

keep ao_kreis jahr MP
duplicates drop
save data/marketpotential.dta, replace

********************************************************************************
***	CALCULATE MEAN WAGES PER COUNTRY (EXCLUDING FIRM)

use data/Panel.dta if jahr >= 2000 & jahr <= 2010, clear

keep untid betnr jahr persnr tentgelt

merge m:1 betnr jahr using data/BHPinclUntID.dta, keep(3) keepusing(ao_kreis)
drop _merge

bys ao_kreis jahr: egen wag_kreis = total(tentgelt)
bys ao_kreis jahr: egen emp_kreis = count(persnr)

bys untid ao_kreis jahr: egen wag_kreis_unt  = total(tentgelt)
bys untid ao_kreis jahr: egen empl_kreis_unt = count(persnr)

gen s_unt = empl_kreis_unt / emp_kreis

gen mean_wage = wag_kreis/emp_kreis
gen aux = wag_kreis_unt/emp_kreis

gen mean_wage2 = (mean_wage - aux) / (1-s_unt)

keep untid jahr mean_wage* ao_kreis

duplicates drop
count if untid == .
count if jahr == .
count if ao_kreis == . | ao_kreis == .n | ao_kreis == .z
drop if ao_kreis == . | ao_kreis == .n | ao_kreis == .z
isid untid jahr ao_kreis

label variable mean_wage2 "Average wages in year and county excluding firm"
label variable mean_wage  "Average wages in year and county"

save data/wages_untid_aokreis_2000-2010.dta, replace

********************************************************************************
***	FILLIN DATA SET

use data/Panel.dta if jahr >= 2000 & jahr <= 2010, clear

keep untid betnr jahr ao_kreis persnr

bys untid jahr: egen empl_unt = count(persnr)
bys betnr jahr: egen empl_bet = count(persnr)

drop persnr
duplicates drop

isid betnr jahr

//	Restrict sample
bys untid jahr: egen count_est = count(betnr)
bys untid: egen ever_ME = max(count_est > 1)
keep if ever_ME == 1
drop ever_ME
drop count_est

qui gen aux = (empl_unt >= 10)
bys untid: egen sample = min(aux)
keep if sample == 1
drop sample aux

merge 1:1 betnr jahr using data/BHPinclUntID.dta, keep(3) keepusing(hauptbet)
drop _merge

bys untid jahr: egen count_haupt = total(hauptbet)

bys untid jahr: egen count_est = count(betnr)
gen d_multest = (count_est > 1)
tab2 count_haupt d_multest
drop if count_haupt == 0 & d_multest == 1
drop if count_haupt == 1 & d_multest == 0
drop d_multest count_est

// Rectangularize dataset to give every firm the option to invest in any county
sort untid jahr
egen untjhr_id = group(untid jahr)
gen inv = 1

fillin untjhr_id ao_kreis
replace inv = 0 if _fillin == 1

foreach variable in untid jahr empl_unt {
	bys untjhr_id (`variable'): replace `variable' = `variable'[_n-1] if `variable' == . & `variable'[_n-1] != . 
	count if `variable' == .
}

********************************************************************************
***	MERGE COVARIATES

//	Firm/establishment characteristics
merge m:1 betnr jahr using data/BHPinclUntID.dta, keep(1 3) keepusing(hq_kreis w93_3_gen)
drop _merge

merge m:1 untid jahr using data/Amadeus_Aug2018_untid.dta, keep(1 3) keepusing(Rechtsform_neu)
drop _merge 

qui gen aux_hq_wz93 = w93_3_gen if hauptbet == 1
bys untid jahr: egen hq_wz93 = min(aux_hq_wz)
drop aux_hq_wz
order hq_kreis hq_wz93, after(hauptbet)

sort untjhr_id
foreach variable in hq_kreis Rechtsform_neu {
	by untjhr_id: replace `variable' = `variable'[_n-1] if `variable' == . & `variable'[_n-1] != .
	count if `variable' == .
}

compress

bys untid: egen Rechtsform = max(Rechtsform_neu)
qui gen legal_form = .
qui replace legal_form = 1 if (Rechtsform == 8 | Rechtsform == 16 | Rechtsform == 17 | Rechtsform == 19 | Rechtsform == 29)
qui replace legal_form = 2 if (Rechtsform == 3 | Rechtsform == 11 | Rechtsform == 12 | Rechtsform == 14 | Rechtsform == 15)
qui replace legal_form = 3 if (Rechtsform == 13)
qui replace legal_form = 4 if (Rechtsform == 5 | Rechtsform == 22 | Rechtsform == 23 | Rechtsform == 24)
label define legal 1 "Einzelunternehmen" 2 "GmbH & coKG" 3 "GmbH" 4 "AG"
label values legal_form legal
tab legal_form, gen(d_legal)
drop d_legal1

//	Aggregate at county level
collapse (sum) empl_kreis=empl_bet (mean) inv d_legal*, by(untid jahr hq_wz93 hq_kreis empl_unt ao_kreis)

//	Distance
merge m:1 ao_kreis hq_kreis using data/Distanzen_v2014.dta, keep(1 3) keepusing(distance_all)
drop _merge

gen ldistall = ln(distance_all)

//	Market potential
sort ao_kreis jahr
merge m:1 ao_kreis jahr using data/marketpotential.dta
drop _merge
gen ln_MP = log(MP)

//	Wages
merge 1:1 untid jahr ao_kreis using data/wages_untid_aokreis_2000-2010.dta, keep(1 3)
drop _merge

rename mean_wage aux
bys ao_kreis jahr: egen mean_wage = max(aux)
drop aux
qui replace mean_wage2 = mean_wage if inv == 0
count if mean_wage != mean_wage2
count if mean_wage != mean_wage2 & inv != 1
drop mean_wage

rename mean_wage2 mean_wage

gen aux = mean_wage if ao_kreis == hq_kreis
bys untid jahr: egen hq_mean_wage = max(aux)
drop aux
gen rel_wage =  mean_wage/hq_mean_wage

//	Land prices
merge m:1 ao_kreis jahr using data/Kreis_controls.dta, keep(1 3) keepusing(Preis_pqm_baureif)
drop _merge

qui gen aux = Preis_pqm_baureif if ao_kreis == hq_kreis
qui egen hq_Preis_pqmbau = max(aux), by(untid jahr)
drop aux
gen rel_Preis =  Preis_pqm_baureif /hq_Preis_pqmbau
drop hq_Preis_pqmbau

keep untid jahr empl_unt hq_kreis hq_wz93 d_legal2 d_legal3 d_legal4 ao_kreis inv empl_kreis ldistall ln_MP rel_wage rel_Preis 
order untid jahr empl_unt hq_kreis hq_wz93 d_legal2 d_legal3 d_legal4 ao_kreis inv empl_kreis ldistall ln_MP rel_wage rel_Preis

qui gen ln_empl_kreis = log(empl_kreis) 
qui tab jahr, gen(d_year)
qui tab hq_wz93, gen(d_93wz) 
qui tab hq_kreis, gen(d_hqkreis)
qui tab ao_kreis, gen(d_aokreis)

save data/MEloc_2000-2010_p.dta, replace
erase data/marketpotential.dta
erase data/wages_untid_aokreis_2000-2010.dta

log close
